In [1]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
from sklearn.datasets import make_circles
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.impute import SimpleImputer
import shap
import seaborn as sns
import numpy as np
In [2]:
pip install shap
Requirement already satisfied: shap in c:\users\viswanathan\anaconda4\lib\site-packages (0.46.0)
Requirement already satisfied: numpy in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (1.26.4)
Requirement already satisfied: scipy in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (1.13.1)
Requirement already satisfied: scikit-learn in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (1.4.2)
Requirement already satisfied: pandas in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (2.2.2)
Requirement already satisfied: tqdm>=4.27.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (4.66.4)
Requirement already satisfied: packaging>20.9 in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (23.2)
Requirement already satisfied: slicer==0.0.8 in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (0.0.8)
Requirement already satisfied: numba in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (0.59.1)
Requirement already satisfied: cloudpickle in c:\users\viswanathan\anaconda4\lib\site-packages (from shap) (2.2.1)
Requirement already satisfied: colorama in c:\users\viswanathan\anaconda4\lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\users\viswanathan\anaconda4\lib\site-packages (from numba->shap) (0.42.0)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas->shap) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas->shap) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas->shap) (2023.3)
Requirement already satisfied: joblib>=1.2.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn->shap) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn->shap) (2.2.0)
Requirement already satisfied: six>=1.5 in c:\users\viswanathan\anaconda4\lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [3]:
df= pd.read_excel("C:/Users/Viswanathan/Desktop/Gestational Diabetic Data Set.xlsx")
In [4]:
df.dtypes
Out[4]:
Age                                  int64
No of Pregnancy                      int64
Gestation in previous Pregnancy      int64
BMI                                float64
HDL                                float64
Family History                       int64
unexplained prenetal loss            int64
Large Child or Birth Default         int64
PCOS                                 int64
Sys BP                             float64
Dia BP                               int64
OGTT                               float64
Hemoglobin                         float64
Sedentary Lifestyle                  int64
Prediabetes                          int64
Outcome                              int64
dtype: object
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3524 entries, 0 to 3523
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              3524 non-null   int64  
 1   No of Pregnancy                  3524 non-null   int64  
 2   Gestation in previous Pregnancy  3524 non-null   int64  
 3   BMI                              2444 non-null   float64
 4   HDL                              2523 non-null   float64
 5   Family History                   3524 non-null   int64  
 6   unexplained prenetal loss        3524 non-null   int64  
 7   Large Child or Birth Default     3524 non-null   int64  
 8   PCOS                             3524 non-null   int64  
 9   Sys BP                           1819 non-null   float64
 10  Dia BP                           3524 non-null   int64  
 11  OGTT                             3012 non-null   float64
 12  Hemoglobin                       3524 non-null   float64
 13  Sedentary Lifestyle              3524 non-null   int64  
 14  Prediabetes                      3524 non-null   int64  
 15  Outcome                          3524 non-null   int64  
dtypes: float64(5), int64(11)
memory usage: 440.6 KB
In [6]:
import pandas

pandas.set_option('display.width', 150)
description = df.describe()
print(description)
               Age  No of Pregnancy  Gestation in previous Pregnancy          BMI          HDL  Family History  unexplained prenetal loss  \
count  3524.000000      3524.000000                      3524.000000  2444.000000  2523.000000     3524.000000                3524.000000   
mean     32.582860         2.076334                         0.960840    27.854092    46.468490        0.498297                   0.364926   
std       6.169265         0.838755                         0.722415     5.714737    10.801618        0.500068                   0.481478   
min      20.000000         1.000000                         0.000000    13.300000    15.000000        0.000000                   0.000000   
25%      28.000000         1.000000                         0.000000    24.400000    42.000000        0.000000                   0.000000   
50%      32.000000         2.000000                         1.000000    27.500000    49.000000        0.000000                   0.000000   
75%      37.000000         3.000000                         1.000000    31.000000    55.000000        1.000000                   1.000000   
max      45.000000         4.000000                         2.000000    45.000000    70.000000        1.000000                   1.000000   

       Large Child or Birth Default         PCOS       Sys BP       Dia BP         OGTT   Hemoglobin  Sedentary Lifestyle  Prediabetes      Outcome  
count                   3524.000000  3524.000000  1819.000000  3524.000000  3012.000000  3524.000000          3524.000000  3524.000000  3524.000000  
mean                       0.357832     0.264756   135.778999    81.543984   170.707503    13.959904             0.449773     0.293133     0.389330  
std                        0.479430     0.441266    22.737648    11.376111    48.160549     1.864223             0.497541     0.455263     0.487668  
min                        0.000000     0.000000    90.000000    60.000000    80.000000     8.800000             0.000000     0.000000     0.000000  
25%                        0.000000     0.000000   122.000000    74.000000   142.000000    12.700000             0.000000     0.000000     0.000000  
50%                        0.000000     0.000000   132.000000    81.000000   156.000000    14.000000             0.000000     0.000000     0.000000  
75%                        1.000000     1.000000   153.000000    86.000000   195.000000    15.000000             1.000000     1.000000     1.000000  
max                        1.000000     1.000000   185.000000   124.000000   403.000000    18.000000             1.000000     1.000000     1.000000  
In [7]:
df.hist(figsize=(15, 10), bins=20)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [80]:
 

plt.hist(df["Outcome"], bins=30, color='green', edgecolor='black')
 
# Adding labels and title
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Outcome variable - GDM (1) and Non GDM (0)')
 
# Display the plot
plt.show()
No description has been provided for this image
In [9]:
skew = df.skew()
print(skew)
Age                                0.050452
No of Pregnancy                    0.317600
Gestation in previous Pregnancy    0.058916
BMI                                0.524710
HDL                               -0.917368
Family History                     0.006813
unexplained prenetal loss          0.561399
Large Child or Birth Default       0.593407
PCOS                               1.066829
Sys BP                             0.087402
Dia BP                             0.695347
OGTT                               1.495513
Hemoglobin                         0.187583
Sedentary Lifestyle                0.202015
Prediabetes                        0.909296
Outcome                            0.454131
dtype: float64
In [10]:
sns.pairplot(df, hue='Outcome')
plt.title('Multivariate Analysis')
plt.show()
No description has been provided for this image
In [11]:
# To find the outlier in the numerical data
plt.figure(figsize=(15, 10))
sns.boxplot(data=df, orient='h')
plt.show()
No description has been provided for this image
In [12]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()
No description has been provided for this image
In [10]:
df=df.drop(['HDL', 'unexplained prenetal loss','Sys BP','Dia BP','OGTT', 'Hemoglobin','Sedentary Lifestyle','Prediabetes'],axis=1)
print(df)
      Age  No of Pregnancy  Gestation in previous Pregnancy   BMI  Family History  Large Child or Birth Default  PCOS  Outcome
0      22                2                                1   NaN               0                             0     0        0
1      26                2                                1   NaN               0                             0     0        0
2      29                1                                0   NaN               0                             0     0        0
3      28                2                                1   NaN               0                             0     0        0
4      21                2                                1   NaN               0                             0     0        0
...   ...              ...                              ...   ...             ...                           ...   ...      ...
3519   31                4                                1  24.1               0                             1     0        1
3520   26                3                                1  34.5               1                             0     1        1
3521   35                2                                2  23.6               1                             1     0        1
3522   37                2                                0  23.3               1                             1     1        0
3523   43                2                                0  28.6               1                             0     0        0

[3524 rows x 8 columns]
In [11]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3524 entries, 0 to 3523
Data columns (total 8 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              3524 non-null   int64  
 1   No of Pregnancy                  3524 non-null   int64  
 2   Gestation in previous Pregnancy  3524 non-null   int64  
 3   BMI                              2444 non-null   float64
 4   Family History                   3524 non-null   int64  
 5   Large Child or Birth Default     3524 non-null   int64  
 6   PCOS                             3524 non-null   int64  
 7   Outcome                          3524 non-null   int64  
dtypes: float64(1), int64(7)
memory usage: 220.4 KB
In [15]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title('Identifying Missing Values')
plt.show()
No description has been provided for this image
In [13]:
X, y = make_circles(n_samples=100, noise=0.0, random_state=1)
plt.scatter(X[:, 0], X[:, 1])
plt.show()
No description has been provided for this image
In [16]:
X=df[["Age", "No of Pregnancy", "Gestation in previous Pregnancy","Family History", "Large Child or Birth Default", "PCOS"]]
y=df["Outcome"]
from sklearn.feature_selection import RFE

from sklearn.svm import SVR
estimator = SVR(kernel="linear")

selector = RFE(estimator, n_features_to_select=5, step=1)

selector.fit(X, y)

print(selector.support_)

print(selector.ranking_)
[ True  True  True  True False  True]
[1 1 1 1 2 1]
In [12]:
X=df[["Age", "No of Pregnancy", "Gestation in previous Pregnancy","BMI","Family History", "Large Child or Birth Default", "PCOS"]]
y=df["Outcome"]
In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=1, stratify=y)
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
X_train shape: (1762, 7)
X_test shape: (1762, 7)
y_train shape: (1762,)
y_test shape: (1762,)
In [14]:
X_train.isnull().sum()
Out[14]:
Age                                  0
No of Pregnancy                      0
Gestation in previous Pregnancy      0
BMI                                546
Family History                       0
Large Child or Birth Default         0
PCOS                                 0
dtype: int64
In [15]:
X_test.isnull().sum()
Out[15]:
Age                                  0
No of Pregnancy                      0
Gestation in previous Pregnancy      0
BMI                                534
Family History                       0
Large Child or Birth Default         0
PCOS                                 0
dtype: int64
In [16]:
y_train.isnull().sum()
Out[16]:
0
In [17]:
y_test.isnull().sum()
Out[17]:
0
In [18]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X_train[["BMI"]],X_test[["BMI"]])
X_train["BMI"] = imputer.transform(X_train[["BMI"]])
X_test["BMI"] = imputer.transform(X_test[["BMI"]])
In [19]:
X_train.isnull().sum()
Out[19]:
Age                                0
No of Pregnancy                    0
Gestation in previous Pregnancy    0
BMI                                0
Family History                     0
Large Child or Birth Default       0
PCOS                               0
dtype: int64
In [20]:
X_test.isnull().sum()
Out[20]:
Age                                0
No of Pregnancy                    0
Gestation in previous Pregnancy    0
BMI                                0
Family History                     0
Large Child or Birth Default       0
PCOS                               0
dtype: int64
In [27]:
model = AdaBoostClassifier()
grid = dict()
grid['n_estimators'] = [10, 50, 100, 500]
grid['learning_rate'] = [0.0001, 0.001, 0.01, 0.1, 1.0]
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy')
grid_result = grid_search.fit(X_train, y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
Best: 0.962724 using {'learning_rate': 1.0, 'n_estimators': 50}
0.898993 (0.016641) with: {'learning_rate': 0.0001, 'n_estimators': 10}
0.898993 (0.016641) with: {'learning_rate': 0.0001, 'n_estimators': 50}
0.898993 (0.016641) with: {'learning_rate': 0.0001, 'n_estimators': 100}
0.898993 (0.016641) with: {'learning_rate': 0.0001, 'n_estimators': 500}
0.898993 (0.016641) with: {'learning_rate': 0.001, 'n_estimators': 10}
0.898993 (0.016641) with: {'learning_rate': 0.001, 'n_estimators': 50}
0.898993 (0.016641) with: {'learning_rate': 0.001, 'n_estimators': 100}
0.898993 (0.016641) with: {'learning_rate': 0.001, 'n_estimators': 500}
0.898993 (0.016641) with: {'learning_rate': 0.01, 'n_estimators': 10}
0.898993 (0.016641) with: {'learning_rate': 0.01, 'n_estimators': 50}
0.912047 (0.020003) with: {'learning_rate': 0.01, 'n_estimators': 100}
0.937962 (0.021217) with: {'learning_rate': 0.01, 'n_estimators': 500}
0.912047 (0.020003) with: {'learning_rate': 0.1, 'n_estimators': 10}
0.940041 (0.018289) with: {'learning_rate': 0.1, 'n_estimators': 50}
0.944389 (0.017036) with: {'learning_rate': 0.1, 'n_estimators': 100}
0.958765 (0.016563) with: {'learning_rate': 0.1, 'n_estimators': 500}
0.945892 (0.015780) with: {'learning_rate': 1.0, 'n_estimators': 10}
0.962724 (0.014504) with: {'learning_rate': 1.0, 'n_estimators': 50}
0.961406 (0.014973) with: {'learning_rate': 1.0, 'n_estimators': 100}
0.958188 (0.013406) with: {'learning_rate': 1.0, 'n_estimators': 500}
In [28]:
rfc = RandomForestClassifier()
forest_params = {
    'bootstrap': [True,False],
    'max_depth': [2,4],
    'max_features': ['auto','sqrt'],
    'min_samples_leaf': [1,2],
    'min_samples_split': [2],
    'n_estimators': [100]
}

Randomforest = GridSearchCV(rfc, param_grid= forest_params, cv = 10, n_jobs=-1,verbose=2)

Randomforest.fit(X_train, y_train)
print(Randomforest.best_params_)
Fitting 10 folds for each of 16 candidates, totalling 160 fits
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py:547: FitFailedWarning: 
80 fits failed out of a total of 160.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'log2', 'sqrt'} or None. Got 'auto' instead.

--------------------------------------------------------------------------------
62 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'sqrt', 'log2'} or None. Got 'auto' instead.

  warnings.warn(some_fits_failed_message, FitFailedWarning)
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_search.py:1051: UserWarning: One or more of the test scores are non-finite: [       nan        nan 0.94041153 0.94836929        nan        nan
 0.95460645 0.95687275        nan        nan 0.93417437 0.94041474
        nan        nan 0.9557396  0.95403826]
  warnings.warn(
{'bootstrap': True, 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
In [29]:
LR = LogisticRegression()
LR_params=  {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : np.logspace(-4, 4, 20),
    'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],
    'max_iter' : [100]}
Logisticregressionclassifier = GridSearchCV(estimator=LR, param_grid = LR_params, cv = 10, verbose=True, n_jobs=-1)
Logisticregressionclassifier.fit(X_train,y_train)
print(Logisticregressionclassifier.best_params_)
Fitting 10 folds for each of 400 candidates, totalling 4000 fits
{'C': 11.288378916846883, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py:547: FitFailedWarning: 
2600 fits failed out of a total of 4000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or None penalties, got l1 penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or None penalties, got l1 penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver sag supports only 'l2' or None penalties, got l1 penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or None penalties, got elasticnet penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or None penalties, got elasticnet penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 75, in _check_solver
    raise ValueError(
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1172, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 67, in _check_solver
    raise ValueError(
ValueError: Solver sag supports only 'l2' or None penalties, got elasticnet penalty.

--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1182, in fit
    raise ValueError("l1_ratio must be specified when penalty is elasticnet.")
ValueError: l1_ratio must be specified when penalty is elasticnet.

--------------------------------------------------------------------------------
302 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'l2', 'l1', 'elasticnet'} or None. Got 'none' instead.

--------------------------------------------------------------------------------
126 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'l1', 'l2', 'elasticnet'} or None. Got 'none' instead.

--------------------------------------------------------------------------------
146 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'l2', 'elasticnet', 'l1'} or None. Got 'none' instead.

--------------------------------------------------------------------------------
178 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'l1', 'elasticnet', 'l2'} or None. Got 'none' instead.

--------------------------------------------------------------------------------
248 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'penalty' parameter of LogisticRegression must be a str among {'elasticnet', 'l1', 'l2'} or None. Got 'none' instead.

  warnings.warn(some_fits_failed_message, FitFailedWarning)
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\model_selection\_search.py:1051: UserWarning: One or more of the test scores are non-finite: [       nan        nan 0.61067026        nan 0.61067026 0.78492874
 0.78492874 0.61067026 0.77357152 0.68561248        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.61067026        nan
 0.61067026 0.80933166 0.80933166 0.61521251 0.80136749 0.70206728
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.61067026        nan 0.61067026 0.83771507 0.83771507 0.68502183
 0.8297509  0.78830573        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.61067026        nan 0.65891436 0.86494928
 0.86494928 0.7729584  0.86095917 0.830868          nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.61067026        nan
 0.71113893 0.88877119 0.88877119 0.80984527 0.86550462 0.86377761
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.85694979        nan 0.85696905 0.89899204 0.89956022 0.87228107
 0.91089818 0.90010593        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.88420005        nan 0.91486261 0.91092065
 0.91092065 0.8989792  0.91544042 0.91145994        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.9160086         nan
 0.91429443 0.92112866 0.92112866 0.91430085 0.91714176 0.9125963
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.92565806        nan 0.91373267 0.92452812 0.92452812 0.91827812
 0.9188463  0.91316448        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.92736582        nan 0.91543721 0.92622625
 0.92622625 0.92111582 0.92054764 0.91486903        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.92793079        nan
 0.91543721 0.92679443 0.92679443 0.92622625 0.921684   0.91430085
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.92793079        nan 0.91600218 0.92679764 0.92793079 0.92793079
 0.921684   0.915434          nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.92793079        nan 0.915434   0.92793079
 0.92793079 0.92849897 0.921684   0.91600218        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.92793079        nan
 0.91600218 0.92736261 0.92793079 0.92793079 0.92225218 0.915434
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.92793079        nan 0.915434   0.92849576 0.92793079 0.92793079
 0.921684   0.915434          nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.92793079        nan 0.915434   0.92736261
 0.92793079 0.92793079 0.921684   0.915434          nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.92793079        nan
 0.915434   0.92793079 0.92793079 0.92793079 0.92282036 0.91600218
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.92793079        nan 0.91600218 0.92849576 0.92793079 0.92793079
 0.92225218 0.915434          nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan 0.92793079        nan 0.915434   0.92793079
 0.92793079 0.92793079 0.921684   0.91600218        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan 0.92793079        nan
 0.915434   0.92793079 0.92793079 0.92793079 0.921684   0.915434
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan]
  warnings.warn(
In [30]:
NB=GaussianNB()
NB_params = {'var_smoothing': np.logspace(0,-9, num=100)}
Naivebayes=GridSearchCV(estimator=NB, 
                     param_grid=NB_params, 
                     cv=10,
                     verbose=1, 
                     scoring='accuracy')
Naivebayes.fit(X_train,y_train)
print(Naivebayes.best_params_)
print(Naivebayes.best_estimator_)
Fitting 10 folds for each of 100 candidates, totalling 1000 fits
{'var_smoothing': 0.0004328761281083057}
GaussianNB(var_smoothing=0.0004328761281083057)
In [ ]:
svc=SVC()
svc_params = {'C': [0.1, 1, 10, 100],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['linear','rbf','poly','sigmoid']}  
  
Supportvectorclassifier = GridSearchCV(estimator=svc, param_grid=svc_params, refit = True, verbose = 3) 
Supportvectorclassifier.fit(X_train,y_train)
print(Supportvectorclassifier.best_params_)
Fitting 5 folds for each of 80 candidates, totalling 400 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.926 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.926 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.926 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.938 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.946 total time=   0.0s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.847 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.836 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.835 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.810 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.793 total time=   0.0s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.958 total time= 2.2min
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.960 total time= 1.2min
[CV 3/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.957 total time= 2.2min
In [ ]:
from sklearn.tree import DecisionTreeClassifier
Dtree = DecisionTreeClassifier()
Dtree_params= {
    'max_depth': [10, 20, 30,None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4] }
Decisiontree = GridSearchCV(Dtree, param_grid= Dtree_params, cv = 10, scoring='accuracy')
Decisiontree.fit(X_train, y_train)
print(Decisiontree.best_params_)
In [21]:
sns.FacetGrid(df, hue="Outcome", height=5).map(sns.histplot, "Age", kde=True).add_legend()
plt.title('Age Distribution ')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()
No description has been provided for this image
In [ ]:
plt.hist(df["Target"])
plt.xlabel('Target')
plt.title('Target distribution of GDM-1 vs NonGDM-0')
 
plt.show()
In [46]:
df_crosstab = pd.crosstab(df.PCOS, df.Target, normalize='index')
# Create clustermap
g = sns.clustermap(df_crosstab, annot=True, cmap='Blues', fmt=".2f", col_cluster=False, cbar=False, cbar_pos=None)
# Add title
g.fig.suptitle('Clustermap')
# Plot figure
plt.show()
No description has been provided for this image
In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
In [22]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(bootstrap = True, max_depth = 4, max_features = "sqrt", min_samples_leaf = 2, min_samples_split = 2, n_estimators= 100)
rfc.fit(X_train, y_train)
Out[22]:
RandomForestClassifier(max_depth=4, min_samples_leaf=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=4, min_samples_leaf=2)
In [28]:
rfc_train = rfc.predict(X_train)
from sklearn import metrics
print("Training Accuracy =", format(metrics.accuracy_score(y_train, rfc_train)))
Training Accuracy = 0.9585698070374574
In [29]:
rfc_predictions = rfc.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test, rfc_predictions)))
Test Accuracy = 0.9523269012485811
In [74]:
from sklearn.metrics import RocCurveDisplay
rfc_disp = RocCurveDisplay.from_estimator(rfc, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("Random Forest AUC_ROC")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
No description has been provided for this image
In [30]:
rfc.feature_importances_
sorted_idx = rfc.feature_importances_.argsort()
pd.Series(rfc.feature_importances_[sorted_idx],index=X.columns).plot(kind='barh')
plt.title("Random Forest Feature Importance")
Out[30]:
Text(0.5, 1.0, 'Random Forest Feature Importance')
No description has been provided for this image
In [119]:
import shap
from matplotlib import pyplot as plt
shap.initjs()
explainer = shap.TreeExplainer(rfc)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
importance = rfc.feature_importances_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Score: %.5f' % (i,v))
# plot feature importance
plt.barh([x for x in range(len(importance))], importance)
plt.title("Random Forest SHAP model")
No description has been provided for this image
No description has been provided for this image
Feature: 0, Score: 0.10743
Feature: 1, Score: 0.01339
Feature: 2, Score: 0.21647
Feature: 3, Score: 0.37339
Feature: 4, Score: 0.02617
Feature: 5, Score: 0.03171
Feature: 6, Score: 0.23144
Out[119]:
Text(0.5, 1.0, 'Random Forest SHAP model')
No description has been provided for this image
In [116]:
plot_learning_curves(X_train, y_train, X_test, y_test, rfc)
Out[116]:
([0.03409090909090909,
  0.03409090909090909,
  0.04734848484848485,
  0.045454545454545456,
  0.04426787741203178,
  0.04446546830652791,
  0.047039740470397405,
  0.0454222853087296,
  0.0416403785488959,
  0.04029511918274688],
 [0.07434733257661748,
  0.05051078320090806,
  0.05278093076049943,
  0.051645856980703744,
  0.051645856980703744,
  0.04937570942111237,
  0.04880817253121453,
  0.04767309875141884,
  0.04937570942111237,
  0.048240635641316684])
No description has been provided for this image
In [27]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(rfc, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
Cross-validation scores: [0.93785311 0.96610169 0.94886364 0.97159091 0.96590909 0.91477273
 0.95454545 0.94886364 0.97159091 0.94886364]
Mean accuracy: 0.9528954802259888
Standard deviation: 0.016655039102326165
In [87]:
pip install probatus
Requirement already satisfied: probatus in c:\users\viswanathan\anaconda4\lib\site-packages (3.1.2)
Requirement already satisfied: scikit-learn>=0.22.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (1.4.2)
Requirement already satisfied: pandas>=1.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (2.2.2)
Requirement already satisfied: matplotlib>=3.1.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (3.8.4)
Requirement already satisfied: joblib>=0.13.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (1.4.2)
Requirement already satisfied: shap>=0.43.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (0.46.0)
Requirement already satisfied: numpy<2.0.0,>=1.23.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (1.26.4)
Requirement already satisfied: numba>=0.57.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (0.59.1)
Requirement already satisfied: loguru>=0.7.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from probatus) (0.7.2)
Requirement already satisfied: colorama>=0.3.4 in c:\users\viswanathan\anaconda4\lib\site-packages (from loguru>=0.7.2->probatus) (0.4.6)
Requirement already satisfied: win32-setctime>=1.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from loguru>=0.7.2->probatus) (1.1.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.1.1->probatus) (2.9.0.post0)
Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\users\viswanathan\anaconda4\lib\site-packages (from numba>=0.57.0->probatus) (0.42.0)
Requirement already satisfied: pytz>=2020.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas>=1.0.0->probatus) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas>=1.0.0->probatus) (2023.3)
Requirement already satisfied: scipy>=1.6.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn>=0.22.2->probatus) (1.13.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn>=0.22.2->probatus) (2.2.0)
Requirement already satisfied: tqdm>=4.27.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from shap>=0.43.0->probatus) (4.66.4)
Requirement already satisfied: slicer==0.0.8 in c:\users\viswanathan\anaconda4\lib\site-packages (from shap>=0.43.0->probatus) (0.0.8)
Requirement already satisfied: cloudpickle in c:\users\viswanathan\anaconda4\lib\site-packages (from shap>=0.43.0->probatus) (2.2.1)
Requirement already satisfied: six>=1.5 in c:\users\viswanathan\anaconda4\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.1.1->probatus) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [31]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn import metrics
svc_model = SVC(C=0.1, gamma=1, kernel="linear")
svc_model.fit(X_train, y_train)
svc_pred = svc_model.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test, svc_pred)))
Test Accuracy = 0.9262202043132803
In [32]:
from mlxtend.plotting import plot_learning_curves
 
# Plot the learning curves
plot_learning_curves(X_train, y_train, X_test, y_test, svc_model)
Out[32]:
([0.11931818181818182,
  0.11363636363636363,
  0.10416666666666667,
  0.09232954545454546,
  0.07832009080590238,
  0.07663197729422895,
  0.07218167072181671,
  0.07026259758694109,
  0.06687697160883281,
  0.06696935300794551],
 [0.12996594778660614,
  0.10102156640181612,
  0.09648127128263337,
  0.0891032917139614,
  0.07604994324631101,
  0.07491486946651532,
  0.07377979568671963,
  0.07377979568671963,
  0.07377979568671963,
  0.07377979568671963])
No description has been provided for this image
In [100]:
pip install mlxtend 
Requirement already satisfied: mlxtend in c:\users\viswanathan\anaconda4\lib\site-packages (0.23.1)
Requirement already satisfied: scipy>=1.2.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (1.13.1)
Requirement already satisfied: numpy>=1.16.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (1.26.4)
Requirement already satisfied: pandas>=0.24.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (2.2.2)
Requirement already satisfied: scikit-learn>=1.0.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (1.4.2)
Requirement already satisfied: matplotlib>=3.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (3.8.4)
Requirement already satisfied: joblib>=0.13.2 in c:\users\viswanathan\anaconda4\lib\site-packages (from mlxtend) (1.4.2)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\viswanathan\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas>=0.24.2->mlxtend) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\viswanathan\anaconda4\lib\site-packages (from pandas>=0.24.2->mlxtend) (2023.3)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn>=1.0.2->mlxtend) (2.2.0)
Requirement already satisfied: six>=1.5 in c:\users\viswanathan\anaconda4\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [33]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(svc_model, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
Cross-validation scores: [0.88135593 0.8700565  0.88700565 0.86931818 0.875      0.88068182
 0.90340909 0.875      0.86931818 0.90340909]
Mean accuracy: 0.881455444273241
Standard deviation: 0.01226069401771223
In [33]:
from sklearn.tree import DecisionTreeClassifier

dtree = DecisionTreeClassifier(max_depth= 20, min_samples_leaf=1, min_samples_split=2)
dtree.fit(X_train, y_train)
dtree_predictions = dtree.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test,dtree_predictions)))
Test Accuracy = 0.9687854710556186
In [72]:
from sklearn.metrics import RocCurveDisplay
dtree_disp = RocCurveDisplay.from_estimator(dtree, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("Decision Tree AUC_ROC")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
No description has been provided for this image
In [106]:
plot_learning_curves(X_train, y_train, X_test, y_test, dtree)
Out[106]:
([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.05788876276958002,
  0.04029511918274688,
  0.02894438138479001,
  0.031214528944381384,
  0.036322360953461974,
  0.030079455164585697,
  0.03064699205448354,
  0.03348467650397276,
  0.03348467650397276,
  0.03234960272417707])
No description has been provided for this image
In [120]:
import shap
from matplotlib import pyplot as plt
shap.initjs()
explainer = shap.TreeExplainer(dtree)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
importance = dtree.feature_importances_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Score: %.5f' % (i,v))
# plot feature importance
plt.barh([x for x in range(len(importance))], importance)
plt.title("Decision Tree SHAP model")
No description has been provided for this image
No description has been provided for this image
Feature: 0, Score: 0.08642
Feature: 1, Score: 0.01990
Feature: 2, Score: 0.06003
Feature: 3, Score: 0.65631
Feature: 4, Score: 0.02074
Feature: 5, Score: 0.01880
Feature: 6, Score: 0.13779
Out[120]:
Text(0.5, 1.0, 'Decision Tree SHAP model')
No description has been provided for this image
In [36]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(dtree, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
Cross-validation scores: [0.97740113 0.98305085 0.97175141 0.96022727 0.96590909 0.95454545
 0.94886364 0.96022727 0.97159091 0.95454545]
Mean accuracy: 0.9648112480739599
Standard deviation: 0.010457762990250148
In [52]:
import shap
shap.initjs()
explainer = shap.TreeExplainer(dtree)
expected_value = explainer.expected_value
shap_interaction_values = treeExplainer.shap_interaction_values(x1)
shap.summary_plot(shap_interaction_values, features=x1, max_display=4)
No description has been provided for this image
Explainer expected value: [0.61066969 0.38933031]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[52], line 12
     10 features = X_test.iloc[0:7]
     11 features_display = X_test.loc[features.index]
---> 12 shap.decision_plot(expected_value, shap_values, features_display, link="logit")

File ~\anaconda4\Lib\site-packages\shap\plots\_decision.py:406, in decision(base_value, shap_values, features, feature_names, feature_order, feature_display_range, highlight, link, plot_color, axis_color, y_demarc_color, alpha, color_bar, auto_size_plot, title, xlim, show, return_objects, ignore_warnings, new_base_value, legend_labels, legend_location)
    403 if shap_values.ndim == 3:
    404     # flatten
    405     triu_count = feature_count * (feature_count - 1) // 2
--> 406     idx_diag = np.diag_indices_from(shap_values[0])
    407     idx_triu = np.triu_indices_from(shap_values[0], 1)
    408     a = np.ndarray((observation_count, feature_count + triu_count), shap_values.dtype)

File ~\anaconda4\Lib\site-packages\numpy\lib\index_tricks.py:1044, in diag_indices_from(arr)
   1041 # For more than d=2, the strided formula is only valid for arrays with
   1042 # all dimensions equal, so we check first.
   1043 if not np.all(diff(arr.shape) == 0):
-> 1044     raise ValueError("All dimensions of input must be of equal length")
   1046 return diag_indices(arr.shape[0], arr.ndim)

ValueError: All dimensions of input must be of equal length
In [34]:
rfc_predictions = rfc.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test, rfc_predictions)))
Test Accuracy = 0.9523269012485811
In [35]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(C=545.5594781168514, max_iter =100, penalty= 'l1', solver= 'liblinear')

LR.fit(X_train, y_train)
LR_predictions = LR.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test,LR_predictions)))
Test Accuracy = 0.9233825198637912
In [75]:
from sklearn.metrics import RocCurveDisplay
LR_disp = RocCurveDisplay.from_estimator(LR, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("Logistic Regression AUC_ROC")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
No description has been provided for this image
In [109]:
plot_learning_curves(X_train, y_train, X_test, y_test, LR)
Out[109]:
([0.03977272727272727,
  0.08238636363636363,
  0.08522727272727272,
  0.07954545454545454,
  0.08059023836549375,
  0.08325449385052035,
  0.07380373073803731,
  0.07381121362668559,
  0.07318611987381704,
  0.07150964812712826],
 [0.07434733257661748,
  0.08626560726447219,
  0.08569807037457434,
  0.08342792281498297,
  0.08229284903518728,
  0.08399545970488081,
  0.07491486946651532,
  0.07491486946651532,
  0.0771850170261067,
  0.07661748013620885])
No description has been provided for this image
In [58]:
import shap
shap.initjs()
explainer = shap.TreeExplainer(LR)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
importance = LR.feature_importances_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Score: %.5f' % (i,v))
# plot feature importance
pyplot.bar([x for x in range(len(importance))], importance)
pyplot.show()
No description has been provided for this image
---------------------------------------------------------------------------
InvalidModelError                         Traceback (most recent call last)
Cell In[58], line 3
      1 import shap
      2 shap.initjs()
----> 3 explainer = shap.TreeExplainer(LR)
      4 shap_values = explainer.shap_values(X_test)
      5 shap.summary_plot(shap_values, X_test)

File ~\anaconda4\Lib\site-packages\shap\explainers\_tree.py:195, in TreeExplainer.__init__(self, model, data, model_output, feature_perturbation, feature_names, approximate, link, linearize_link)
    193 self.feature_perturbation = feature_perturbation
    194 self.expected_value = None
--> 195 self.model = TreeEnsemble(model, self.data, self.data_missing, model_output)
    196 self.model_output = model_output
    197 #self.model_output = self.model.model_output # this allows the TreeEnsemble to translate model outputs types by how it loads the model

File ~\anaconda4\Lib\site-packages\shap\explainers\_tree.py:1217, in TreeEnsemble.__init__(self, model, data, data_missing, model_output)
   1215     self.base_offset = model.init_params[param_idx]
   1216 else:
-> 1217     raise InvalidModelError("Model type not yet supported by TreeExplainer: " + str(type(model)))
   1219 # build a dense numpy version of all the tree objects
   1220 if self.trees is not None and self.trees:

InvalidModelError: Model type not yet supported by TreeExplainer: <class 'sklearn.linear_model._logistic.LogisticRegression'>
In [39]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(LR, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
Cross-validation scores: [0.90960452 0.89830508 0.92655367 0.90909091 0.92045455 0.93181818
 0.91477273 0.90909091 0.91477273 0.92613636]
Mean accuracy: 0.9160599640472521
Standard deviation: 0.009678034024275178
In [ ]:
 
In [36]:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
Naivebayes = GaussianNB(var_smoothing=0.0001873817422860383)
Naivebayes.fit(X_train, y_train)
Naivebayes_pred =Naivebayes.predict(X_test)
print ("Test Accuracy:", metrics.accuracy_score (y_test, Naivebayes_pred))
Test Accuracy: 0.9177071509648127
In [76]:
from sklearn.metrics import RocCurveDisplay
Naivebayes_disp = RocCurveDisplay.from_estimator(Naivebayes, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("Naive Bayes AUC_ROC")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
No description has been provided for this image
In [112]:
plot_learning_curves(X_train, y_train, X_test, y_test, Naivebayes)
Out[112]:
([0.07386363636363637,
  0.10227272727272728,
  0.09090909090909091,
  0.08806818181818182,
  0.09080590238365494,
  0.09271523178807947,
  0.08759124087591241,
  0.085166784953868,
  0.08012618296529968,
  0.08059023836549375],
 [0.09194097616345062,
  0.09023836549375709,
  0.08172531214528944,
  0.08399545970488081,
  0.0851305334846765,
  0.08626560726447219,
  0.08172531214528944,
  0.08172531214528944,
  0.08229284903518728,
  0.08229284903518728])
No description has been provided for this image
In [42]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(Naivebayes, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
Cross-validation scores: [0.92090395 0.88700565 0.92090395 0.89772727 0.92613636 0.91477273
 0.92045455 0.89772727 0.92045455 0.92045455]
Mean accuracy: 0.9126540832049306
Standard deviation: 0.012682209655323906
In [37]:
from sklearn.ensemble import AdaBoostClassifier
AdaB = AdaBoostClassifier(n_estimators=100,
                         learning_rate=1.0)
AdaB.fit(X_train, y_train)
AdaB_predictions = AdaB.predict(X_test)
print("Test Accuracy =", format(metrics.accuracy_score(y_test,AdaB_predictions)))
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
Test Accuracy = 0.9466515323496028
In [114]:
plot_learning_curves(X_train, y_train, X_test, y_test,AdaB)
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
Out[114]:
([0.0,
  0.008522727272727272,
  0.017045454545454544,
  0.03551136363636364,
  0.02724177071509648,
  0.027436140018921477,
  0.030008110300081103,
  0.0319375443577005,
  0.029022082018927444,
  0.028376844494892167],
 [0.05959137343927355,
  0.060158910329171394,
  0.058456299659477864,
  0.06129398410896708,
  0.053348467650397274,
  0.055051078320090804,
  0.04767309875141884,
  0.05448354143019296,
  0.05448354143019296,
  0.053348467650397274])
No description has been provided for this image
In [44]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(AdaB, X_test, y_test, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())
print("Standard deviation:", cv_scores.std())
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
Cross-validation scores: [0.93220339 0.96610169 0.96045198 0.9375     0.96590909 0.92613636
 0.94886364 0.93181818 0.9375     0.94886364]
Mean accuracy: 0.9455347971237803
Standard deviation: 0.013978844187123876
C:\Users\Viswanathan\anaconda4\Lib\site-packages\sklearn\ensemble\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.
  warnings.warn(
In [120]:
import shap
shap.initjs()
explainer = shap(AdaB)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
No description has been provided for this image
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[120], line 3
      1 import shap
      2 shap.initjs()
----> 3 explainer = shap(AdaB)
      4 shap_values = explainer.shap_values(X_test)
      5 shap.summary_plot(shap_values, X_test)

TypeError: 'module' object is not callable
In [68]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import shap
import sklearn

print("Pandas version:", pd.__version__)
print("NumPy version:", np.__version__)
print("Seaborn version:", sns.__version__)
print("Matplotlib version:", matplotlib.__version__)
print("SHAP version:", shap.__version__)
print("Sklearn version:", sklearn.__version__)
Pandas version: 2.2.2
NumPy version: 1.26.4
Seaborn version: 0.13.2
Matplotlib version: 3.8.4
SHAP version: 0.46.0
Sklearn version: 1.4.2
In [57]:
dtree.feature_importances_
Out[57]:
array([0.08642484, 0.01989924, 0.06002654, 0.6563129 , 0.02074202,
       0.01880156, 0.1377929 ])
In [71]:
sorted_idx = dtree.feature_importances_.argsort()
pd.Series(dtree.feature_importances_[sorted_idx],index=X.columns).plot(kind='barh')
plt.title("Decision Tree Feature Importance")
Out[71]:
Text(0.5, 1.0, 'Decision Tree Feature Importance')
No description has been provided for this image
In [43]:
pip install --upgrade scikit-learn
Requirement already satisfied: scikit-learn in c:\users\viswanathan\anaconda4\lib\site-packages (1.5.1)
Requirement already satisfied: numpy>=1.19.5 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn) (1.26.4)
Requirement already satisfied: scipy>=1.6.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn) (1.13.1)
Requirement already satisfied: joblib>=1.2.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn) (1.4.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in c:\users\viswanathan\anaconda4\lib\site-packages (from scikit-learn) (3.5.0)
Note: you may need to restart the kernel to use updated packages.
In [54]:
# To find the performance of the model calculated accuracy.
cm=confusion_matrix(y_test, svc_pred)
cm=(cm/1762*100)
cm.round()
print(cm)
print(classification_report(y_test, svc_pred))
[[59.30760499  1.75936436]
 [ 5.61861521 33.31441544]]
              precision    recall  f1-score   support

           0       0.91      0.97      0.94      1076
           1       0.95      0.86      0.90       686

    accuracy                           0.93      1762
   macro avg       0.93      0.91      0.92      1762
weighted avg       0.93      0.93      0.93      1762

In [53]:
cm=confusion_matrix(y_test, rfc_predictions)
cm=(cm/1762*100)
cm.round()
print(cm)
print(classification_report(y_test, rfc_predictions))
[[58.7400681   2.32690125]
 [ 2.44040863 36.49262202]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1076
           1       0.94      0.94      0.94       686

    accuracy                           0.95      1762
   macro avg       0.95      0.95      0.95      1762
weighted avg       0.95      0.95      0.95      1762

In [ ]:
 
In [144]:
cm=confusion_matrix(y_test, AdaB_predictions)
cm=(cm/1762*100)
cm.round()
print(cm)
print(classification_report(y_test, AdaB_predictions))
[[58.56980704  2.49716232]
 [ 2.83768445 36.0953462 ]]
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      1076
           1       0.94      0.93      0.93       686

    accuracy                           0.95      1762
   macro avg       0.94      0.94      0.94      1762
weighted avg       0.95      0.95      0.95      1762

In [52]:
cm=confusion_matrix(y_test, LR_predictions)
cm=(cm/1762*100)

print(cm)
print(classification_report(y_test, LR_predictions))
[[59.19409762  1.87287174]
 [ 5.78887628 33.14415437]]
              precision    recall  f1-score   support

           0       0.91      0.97      0.94      1076
           1       0.95      0.85      0.90       686

    accuracy                           0.92      1762
   macro avg       0.93      0.91      0.92      1762
weighted avg       0.92      0.92      0.92      1762

In [142]:
cm=confusion_matrix(y_test, dtree_predictions)
cm=(cm/1762*100)
cm.round()
print(cm)
print(classification_report(y_test, dtree_predictions))
[[59.47786606  1.58910329]
 [ 1.5323496  37.40068104]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1076
           1       0.96      0.96      0.96       686

    accuracy                           0.97      1762
   macro avg       0.97      0.97      0.97      1762
weighted avg       0.97      0.97      0.97      1762

In [141]:
cm=confusion_matrix(y_test, Naivebayes_pred)
cm=(cm/1762*100)
cm.round()
print(cm)
print(classification_report(y_test, Naivebayes_pred))
[[59.02383655  2.0431328 ]
 [ 6.1861521  32.74687855]]
              precision    recall  f1-score   support

           0       0.91      0.97      0.93      1076
           1       0.94      0.84      0.89       686

    accuracy                           0.92      1762
   macro avg       0.92      0.90      0.91      1762
weighted avg       0.92      0.92      0.92      1762

In [ ]:
from sklearn.metrics import RocCurveDisplay
svc_disp = RocCurveDisplay.from_estimator(svc_model, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("SupportVectorMachine")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
In [70]:
pred_prob2 = rfc.predict_proba(X_test)
from sklearn.metrics import roc_curve
fpr5, tpr5, thresh5 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score5 = roc_auc_score(y_test, pred_prob1[:,1])
# plot roc curves
plt.plot(fpr5, tpr5, linestyle='--',color='violet', label='Random Forest')
plt.title('Random Forest ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')
# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();

print(auc_score5)
No description has been provided for this image
0.967309547292098
In [68]:
pred_prob4 = Naivebayes.predict_proba(X_test)
from sklearn.metrics import roc_curve
fpr4, tpr4, thresh4 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score4 = roc_auc_score(y_test, pred_prob1[:,1])
# plot roc curves
plt.plot(fpr4, tpr4, linestyle='--',color='Green', label='Naive Bayes')
plt.title('Naive Bayes ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')
# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();

print(auc_score4)
No description has been provided for this image
0.967309547292098
In [67]:
pred_prob = LR.predict_proba(X_test)
from sklearn.metrics import roc_curve
fpr3, tpr3, thresh3 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score3 = roc_auc_score(y_test, pred_prob1[:,1])
# plot roc curves
plt.plot(fpr3, tpr3, linestyle='--',color='Yellow', label='Logistic Regression')
plt.title('Logistic Regression ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')
# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();

print(auc_score3)
No description has been provided for this image
0.967309547292098
In [65]:
pred_prob2 = dtree.predict_proba(X_test)
from sklearn.metrics import roc_curve
fpr2, tpr2, thresh2 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score2 = roc_auc_score(y_test, pred_prob1[:,1])
# plot roc curves
plt.plot(fpr2, tpr2, linestyle='--',color='Red', label='Decision Tree')
plt.title('Decision Tree ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')
# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();

print(auc_score2)
No description has been provided for this image
0.967309547292098
In [63]:
# predict probabilities
pred_prob1 = AdaB.predict_proba(X_test)

from sklearn.metrics import roc_curve
fpr1, tpr1, thresh1 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score1 = roc_auc_score(y_test, pred_prob1[:,1])
# plot roc curves
plt.plot(fpr1, tpr1, linestyle='--',color='orange', label='AdaBoost')
plt.title('AdaBoost ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')
# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();

print(auc_score1)
No description has been provided for this image
0.9852662652952843
In [48]:
# predict probabilities
pred_prob1 = AdaB.predict_proba(X_test)
pred_prob2 = dtree.predict_proba(X_test)
pred_prob3 = LR.predict_proba(X_test)
pred_prob4 = rfc.predict_proba(X_test)
pred_prob5 = Naivebayes.predict_proba(X_test)

from sklearn.metrics import roc_curve
fpr1, tpr1, thresh1 = roc_curve(y_test, pred_prob1[:,1], pos_label=1)
fpr2, tpr2, thresh2 = roc_curve(y_test, pred_prob2[:,1], pos_label=1)
fpr3, tpr3, thresh3 = roc_curve(y_test, pred_prob3[:,1], pos_label=1)
fpr4, tpr4, thresh4 = roc_curve(y_test, pred_prob4[:,1], pos_label=1)
fpr5, tpr5, thresh5 = roc_curve(y_test, pred_prob5[:,1], pos_label=1)

# roc curve for tpr = fpr 
random_probs = [0 for i in range(len(y_test))]
p_fpr, p_tpr, _ = roc_curve(y_test, random_probs, pos_label=1)


from sklearn.metrics import roc_auc_score

# auc scores
auc_score1 = roc_auc_score(y_test, pred_prob1[:,1])
auc_score2 = roc_auc_score(y_test, pred_prob2[:,1])
auc_score3 = roc_auc_score(y_test, pred_prob3[:,1])
auc_score4 = roc_auc_score(y_test, pred_prob4[:,1])
auc_score5 = roc_auc_score(y_test, pred_prob5[:,1])

print(auc_score1,auc_score2, auc_score3, auc_score4, auc_score5)
0.9852662652952843 0.9684394203778165 0.9417072192658263 0.9906778696608756 0.9412791138760337
In [50]:
# plot roc curves
plt.plot(fpr1, tpr1, linestyle='--',color='orange', label='AdaBoost')
plt.plot(fpr2, tpr2, linestyle='--',color='red', label='DecisionTree')
plt.plot(fpr3, tpr3, linestyle='--',color='yellow', label='LogisticRegression')
plt.plot(fpr4, tpr4, linestyle='--',color='green', label='RandomForest')
plt.plot(fpr5, tpr5, linestyle='--',color='black', label='NaiveBayes')
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
# title
plt.title('ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')

plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();
No description has been provided for this image
In [55]:
from sklearn.metrics import RocCurveDisplay
svc_disp = RocCurveDisplay.from_estimator(svc_model, X_test, y_test)
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title("SupportVectorMachine")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.show()
No description has been provided for this image
In [78]:
cm=confusion_matrix(y_test, Naivebayes_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[78], line 3
      1 cm=confusion_matrix(y_test, Naivebayes_pred)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [51]:
cm=confusion_matrix(y_test, rfc_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[51], line 3
      1 cm=confusion_matrix(y_test, rfc_predictions)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [55]:
cm=confusion_matrix(y_test, dtree_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[55], line 3
      1 cm=confusion_matrix(y_test, dtree_predictions)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [56]:
cm=confusion_matrix(y_test, svc_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[56], line 3
      1 cm=confusion_matrix(y_test, svc_pred)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [57]:
cm=confusion_matrix(y_test, AdaB_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[57], line 3
      1 cm=confusion_matrix(y_test, AdaB_predictions)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [58]:
cm=confusion_matrix(y_test, LR_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[58], line 3
      1 cm=confusion_matrix(y_test, LR_predictions)
      2 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= "Outcome")
----> 3 disp.plot(cmap=plt.cm.Blues)
      4 plt.title('Confusion Matrix')
      5 plt.show()

File ~\anaconda4\Lib\site-packages\sklearn\metrics\_plot\confusion_matrix.py:181, in ConfusionMatrixDisplay.plot(self, include_values, cmap, xticks_rotation, values_format, ax, colorbar, im_kw, text_kw)
    179 if colorbar:
    180     fig.colorbar(self.im_, ax=ax)
--> 181 ax.set(
    182     xticks=np.arange(n_classes),
    183     yticks=np.arange(n_classes),
    184     xticklabels=display_labels,
    185     yticklabels=display_labels,
    186     ylabel="True label",
    187     xlabel="Predicted label",
    188 )
    190 ax.set_ylim((n_classes - 0.5, -0.5))
    191 plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:147, in Artist.__init_subclass__.<locals>.<lambda>(self, **kwargs)
    139 if not hasattr(cls.set, '_autogenerated_signature'):
    140     # Don't overwrite cls.set if the subclass or one of its parents
    141     # has defined a set method set itself.
    142     # If there was no explicit definition, cls.set is inherited from
    143     # the hierarchy of auto-generated set methods, which hold the
    144     # flag _autogenerated_signature.
    145     return
--> 147 cls.set = lambda self, **kwargs: Artist.set(self, **kwargs)
    148 cls.set.__name__ = "set"
    149 cls.set.__qualname__ = f"{cls.__qualname__}.set"

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1227, in Artist.set(self, **kwargs)
   1223 def set(self, **kwargs):
   1224     # docstring and signature are auto-generated via
   1225     # Artist._update_set_signature_and_docstring() at the end of the
   1226     # module.
-> 1227     return self._internal_update(cbook.normalize_kwargs(kwargs, self))

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1219, in Artist._internal_update(self, kwargs)
   1212 def _internal_update(self, kwargs):
   1213     """
   1214     Update artist properties without prenormalizing them, but generating
   1215     errors as if calling `set`.
   1216 
   1217     The lack of prenormalization is to maintain backcompatibility.
   1218     """
-> 1219     return self._update_props(
   1220         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1221         "{prop_name!r}")

File ~\anaconda4\Lib\site-packages\matplotlib\artist.py:1195, in Artist._update_props(self, props, errfmt)
   1192             if not callable(func):
   1193                 raise AttributeError(
   1194                     errfmt.format(cls=type(self), prop_name=k))
-> 1195             ret.append(func(v))
   1196 if ret:
   1197     self.pchanged()

File ~\anaconda4\Lib\site-packages\matplotlib\axes\_base.py:73, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
     72 def wrapper(self, *args, **kwargs):
---> 73     return get_method(self)(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\_api\deprecation.py:297, in rename_parameter.<locals>.wrapper(*args, **kwargs)
    292     warn_deprecated(
    293         since, message=f"The {old!r} parameter of {func.__name__}() "
    294         f"has been renamed {new!r} since Matplotlib {since}; support "
    295         f"for the old name will be dropped %(removal)s.")
    296     kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)

File ~\anaconda4\Lib\site-packages\matplotlib\axis.py:2025, in Axis.set_ticklabels(self, labels, minor, fontdict, **kwargs)
   2021 elif isinstance(locator, mticker.FixedLocator):
   2022     # Passing [] as a list of labels is often used as a way to
   2023     # remove all tick labels, so only error for > 0 labels
   2024     if len(locator.locs) != len(labels) and len(labels) != 0:
-> 2025         raise ValueError(
   2026             "The number of FixedLocator locations"
   2027             f" ({len(locator.locs)}), usually from a call to"
   2028             " set_ticks, does not match"
   2029             f" the number of labels ({len(labels)}).")
   2030     tickd = {loc: lab for loc, lab in zip(locator.locs, labels)}
   2031     func = functools.partial(self._format_with_dict, tickd)

ValueError: The number of FixedLocator locations (2), usually from a call to set_ticks, does not match the number of labels (7).
No description has been provided for this image
In [ ]: